knitr::opts_chunk$set(echo = TRUE, 
                      warning = FALSE, 
                      message = FALSE, 
                      fig.align = 'center')
knitr::opts_knit$set(root.dir = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex")
library(GenomicFeatures)
library(ICAS)
library(BioHelper)
library(Biostrings)
txdb <- GenomicFeatures::makeTxDbFromGFF("/mnt/raid61/Personal_data/songjunwei/reference/plasmoDB/gff/PlasmoDB-53_PbergheiANKA.gff")
ref <- readDNAStringSet("/mnt/raid61/Personal_data/songjunwei/reference/plasmoDB/fa/PlasmoDB-53_PbergheiANKA_Genome.fasta")
names(ref) <- mapply(function(x) x[1], strsplit(names(ref), " "))

introns <- unlist(intronsByTranscript(txdb))

SJ <- data.table(SJ = as.character(introns), 
           start = paste0(as.character(seqnames(introns)), ":", start(introns)), 
           end = paste0(as.character(seqnames(introns)), ":", end(introns)))
SJ[, .N, start][N > 1]
SJ[, .N, end][N > 1]
SJ[start == "PbANKA_01_v3:438321"]
SJ[end == "PbANKA_01_v3:439388"]

introns <- BioHelper::DonorSiteSeq(introns, Genome = ref, exon = 0, intron = 2)
introns <- BioHelper::AcceptorSiteSeq(introns, Genome = ref, exon = 0, intron = 2)


v5 <- mapply(with(introns, paste0(DonorMotif, AcceptorMotif)), FUN = function(x) {
  if(x == "GTAG") {
    1
  } else {
    if(x == "GCAG") {
      3
    } else {
      if(x == "ATAC") {
        5
      } else {
        0
      }
    }
  }
})

intronsTab <- data.table(V1 = as.character(seqnames(introns)), 
                         V2 = start(introns), 
                         V3 = end(introns), 
                         V4 = mapply(as.character(strand(introns)), FUN = function(x) ifelse(x == "+", 1, 2)), 
                         V5 = v5, 
                         V6 = 1, 
                         V7 = 100, 
                         V8 = 0, 
                         V9 = 50)

fwrite(intronsTab, "/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/GTF_SJ.txt", col.names = F, row.names = F, quote = F, sep = "\t")

writeXStringSet(ref, "/mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/PlasmoDB-53_PbergheiANKA_Genome.fasta")
cd /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean
pyenv shell 3.7.4


for i in RTA-03 RTA-10 RTA-16 RTA-17 RTA-24 RTA-32; 
do
  samtools view -h /mnt/raid61/Personal_data/tangchao/Temp/20211025/02.SplitFastq/$i.bam > $i.sam
done


# Step2: ONT reads correction

for i in RTA-03 RTA-10 RTA-16 RTA-17 RTA-24 RTA-32; 
do
    python /mnt/data1/tangchao/software/TranscriptClean/TranscriptClean-2.0.2/TranscriptClean.py \
           --threads 8 \
           --sam /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/$i.sam \
           --genome /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/PlasmoDB-53_PbergheiANKA_Genome.fasta \
           --outprefix /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/$i \
           --tmpDir /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/temp > /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/$i.TranscriptClean.log 2>&1
done


for i in RTA-03 RTA-10 RTA-16 RTA-17 RTA-24 RTA-32;
do
    #statements
    minimap2 -ax splice -t 10 /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/PlasmoDB-53_PbergheiANKA_Genome.fasta /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/$i\_clean.fa | samtools sort -@ 4 | samtools view -b > /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/$i.minimap2genome.bam
done
cp -R /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean /mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex/analysis/09.AlternativeSplicing/01.Plasmodium/20211025
cat RTA-03_clean.fa RTA-10_clean.fa RTA-16_clean.fa > tro.fa
cat RTA-17_clean.fa RTA-24_clean.fa RTA-32_clean.fa > sch.fa

for i in tro sch;
do
    #statements
    minimap2 -ax splice -t 10 /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/PlasmoDB-53_PbergheiANKA_Genome.fasta /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/$i.fa | samtools sort -@ 4 | samtools view -b > /mnt/raid61/Personal_data/tangchao/Temp/20211025/05.TranscriptClean/$i.minimap2genome.bam
done


tangchao7498/DecodeR documentation built on May 27, 2023, 7:21 p.m.